import pandas as pd
import numpy as np
import pickle

atp_tennis = pd.read_csv('atp_tennis.csv')

# Function to calculate head-to-head win rates between players
def calculate_head_to_head_win_rate(data):  
    unique_players = pd.concat([data['Player_1'], data['Player_2']]).unique()  
  
    total_matches = data.groupby(['Player_1', 'Player_2']).size().reset_index().rename(columns={0: 'count_total'})  
    player_wins = data[data['Winner'] == data['Player_1']].groupby(['Player_1', 'Player_2']).size().reset_index().rename(columns={0: 'count_wins'})  
  
    all_pairs = pd.MultiIndex.from_product([unique_players, unique_players], names=['Player', 'Opponent']).to_frame(index=False)  
    all_pairs = all_pairs[all_pairs['Player'] != all_pairs['Opponent']]  
      
    h2h_win_rates = all_pairs.merge(total_matches, left_on=['Player', 'Opponent'], right_on=['Player_1', 'Player_2'], how='left').drop(columns=['Player_1', 'Player_2'])  
    h2h_win_rates = h2h_win_rates.merge(player_wins, left_on=['Player', 'Opponent'], right_on=['Player_1', 'Player_2'], how='left').drop(columns=['Player_1', 'Player_2'])  
  
    h2h_win_rates['count_wins'].fillna(0, inplace=True)  
    h2h_win_rates['count_total'].fillna(1, inplace=True)  
    h2h_win_rates['Head-to-Head Win Rate'] = h2h_win_rates['count_wins'] / h2h_win_rates['count_total']  
  
    h2h_win_rates.drop(columns=['count_total', 'count_wins'], inplace=True)  
  
    return h2h_win_rates  
 

# Function to estimate win probability based on individual and head-to-head win rates
def estimate_probability(player_1_win_rate, player_2_win_rate, h2h_player_1_win_rate, h2h_player_2_win_rate, performance_weight=0.7, h2h_weight=0.3):  
    if player_1_win_rate + player_2_win_rate == 0:  
        player_1_win_rate = player_2_win_rate = 0.5  
  
    if h2h_player_1_win_rate + h2h_player_2_win_rate == 0:  
        h2h_player_1_win_rate = h2h_player_2_win_rate = 0.5  
  
    weighted_player_1_win_rate = performance_weight * player_1_win_rate + h2h_weight * h2h_player_1_win_rate  
    weighted_player_2_win_rate = performance_weight * player_2_win_rate + h2h_weight * h2h_player_2_win_rate  
    total_weighted_win_rate = weighted_player_1_win_rate + weighted_player_2_win_rate  
  
    player_1_probability = weighted_player_1_win_rate / total_weighted_win_rate  
    player_2_probability = weighted_player_2_win_rate / total_weighted_win_rate  
  
    return player_1_probability, player_2_probability  

  
# Function to update odds based on performance weight and head-to-head weight  
def update_odds(match_id, performance_weight, h2h_weight, data, h2h_win_rates):  
    match = data.loc[match_id]  
    player_1 = match['Player_1']  
    player_2 = match['Player_2']  
  
    player_1_win_rate = player_stats.loc[player_stats['Opponent'] == player_1, 'Win Rate']  
    player_2_win_rate = player_stats.loc[player_stats['Opponent'] == player_2, 'Win Rate']  
  
    player_1_win_rate = player_1_win_rate.iloc[0] if not player_1_win_rate.empty else 0  
    player_2_win_rate = player_2_win_rate.iloc[0] if not player_2_win_rate.empty else 0  
  
    h2h_player_1_win_rate = h2h_win_rates.loc[h2h_win_rates['Opponent'] == player_2, 'Head-to-Head Win Rate']  
    h2h_player_2_win_rate = h2h_win_rates.loc[h2h_win_rates['Opponent'] == player_1, 'Head-to-Head Win Rate']  
  
    h2h_player_1_win_rate = h2h_player_1_win_rate.iloc[0] if not h2h_player_1_win_rate.empty else 0  
    h2h_player_2_win_rate = h2h_player_2_win_rate.iloc[0] if not h2h_player_2_win_rate.empty else 0  
  
    player_1_probability, player_2_probability = estimate_probability(player_1_win_rate, player_2_win_rate, h2h_player_1_win_rate, h2h_player_2_win_rate, performance_weight, h2h_weight)  
    max_odd_1 = data['Odd_1'].max()  
    max_odd_2 = data['Odd_2'].max()  
      
    if player_1_probability == 0:  
        updated_odd_1 = max_odd_1  
    else:  
        updated_odd_1 = 1 / player_1_probability  
      
    if player_2_probability == 0:  
        updated_odd_2 = max_odd_2  
    else:  
        updated_odd_2 = 1 / player_2_probability  
  
    return updated_odd_1, updated_odd_2  
  
# Calculate head-to-head win rates  
h2h_win_rates = calculate_head_to_head_win_rate(atp_tennis)  
print(h2h_win_rates)
  
# Initialize an empty DataFrame for storing the results  
updated_odds_df = pd.DataFrame(columns=['Match ID', 'Original Odd_1', 'Original Odd_2', 'Updated Odd_1', 'Updated Odd_2'])  

# Loop through the affected matches  
for match_id in federer_match_ids:  
    # Update the odds for the match  
    updated_odd_1, updated_odd_2 = update_odds(match_id, 0.7, 0.3, atp_tennis, h2h_win_rates)  
  
    # Append the results to the DataFrame  
    original_odd_1 = atp_tennis.loc[match_id, 'Odd_1']  
    original_odd_2 = atp_tennis.loc[match_id, 'Odd_2']  
    updated_odds_df = pd.concat([updated_odds_df, pd.DataFrame({'Match ID': [match_id], 'Original Odd_1': [original_odd_1], 'Original Odd_2': [original_odd_2], 'Updated Odd_1': [updated_odd_1], 'Updated Odd_2': [updated_odd_2]})], ignore_index=True)  

# Display the resulting DataFrame  
print(updated_odds_df)  
pickle.dump(updated_odds_df,open("./ref_result/updated_odds_df.pkl","wb"))